41a216cayFe2FQroFuzvNPw1AvNiqQ tools/libxutil/util.c
41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/libxutil/util.h
3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile
+4225f56d7sa9aEARfjNeCVTMYDAmZA tools/misc/cpuperf/Makefile
+4225f56dS5TGdKojmuBnrV3PzbE6Rg tools/misc/cpuperf/README.txt
+4225f56dcodvBSPoWYS6kvwZCQhgzg tools/misc/cpuperf/cpuperf.c
+4225f56dMjZK14EWd8K0gq4v5Diwjg tools/misc/cpuperf/cpuperf_perfcntr.h
+4225f56d_XjSY1297IiH96qeqD4sCA tools/misc/cpuperf/cpuperf_xeno.h
+4225f56dqlGC_UZ681F95mCgLbOeHQ tools/misc/cpuperf/module/Makefile
+4225f56dnmms-VFr1MiDVG_dYoM7IQ tools/misc/cpuperf/module/perfcntr.c
+4225f56dYhIGQRD_kKVJ6xQrkqO0YQ tools/misc/cpuperf/p4perf.h
40ab2cfawIw8tsYo0dQKtp83h4qfTQ tools/misc/fakei386xen
3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
u32 op,
xc_perfc_desc_t *desc);
+/* read/write msr */
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+ unsigned int high);
+
/**
* Memory maps a range within one domain to a local address range. Mappings
* should be unmapped with munmap and should follow the same rules as mmap
return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
}
+
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr)
+{
+ int rc;
+ dom0_op_t op;
+
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 0;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
+}
+
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+ unsigned int high)
+{
+ int rc;
+ dom0_op_t op;
+
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 1;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+ op.u.msr.in1 = low;
+ op.u.msr.in2 = high;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ return rc;
+}
all: $(TARGETS)
$(MAKE) -C miniterm
+ $(MAKE) -C cpuperf
install: all
[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
[ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin
$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
$(INSTALL_PROG) $(INSTALL_SBIN) $(DESTDIR)/usr/sbin
+ $(MAKE) -C cpuperf install
# No sense in installing miniterm on the Xen box.
# $(MAKE) -C miniterm install
clean:
$(RM) *.o $(TARGETS) *~
$(MAKE) -C miniterm clean
+ $(MAKE) -C cpuperf clean
%.o: %.c $(HDRS) Makefile
$(CC) -c $(CFLAGS) -o $@ $<
--- /dev/null
+#
+# Make Performance counter tool
+#
+# $Id: Makefile,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+#
+# $Log: Makefile,v $
+# Revision 1.1 2003/10/13 16:49:44 jrb44
+# Initial revision
+#
+#
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+# these are for Xen
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CC = gcc
+CFLAGS = -Wall -O3
+
+HDRS = $(wildcard *.h)
+SRCS = $(wildcard *.c)
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+
+TARGETS = cpuperf-xen cpuperf-perfcntr
+
+INSTALL_BIN = $(TARGETS)
+
+
+all: $(TARGETS)
+
+clean:
+ $(RM) *.o $(TARGETS)
+
+%: %.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -o $@ $<
+
+cpuperf-xen: cpuperf.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil -DXENO -o $@ $<
+
+cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
+
+install: all
+ $(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
+
+
+# End of $RCSfile: Makefile,v $
+
--- /dev/null
+Usage
+=====
+
+Use either xen-cpuperf, cpuperf-perfcntr as appropriate to the system
+in use.
+
+To write:
+
+ cpuperf -E <escr> -C <cccr>
+
+ optional: all numbers in base 10 unless specified
+
+ -d Debug mode
+ -c <cpu> CPU number
+ -t <thread> ESCR thread bits - default is 12 (Thread 0 all rings)
+ bit 0: Thread 1 in rings 1,2,3
+ bit 1: Thread 1 in ring 0
+ bit 2: Thread 0 in rings 1,2,3
+ bit 3: Thread 0 in ring 0
+ -e <eventsel> Event selection number
+ -m <eventmask> Event mask bits
+ -T <value> ESCR tag value
+ -k Sets CCCR 'compare' bit
+ -n Sets CCCR 'complement' bit
+ -g Sets CCCR 'edge' bit
+ -P <bit> Set the specified bit in MSR_P4_PEBS_ENABLE
+ -V <bit> Set the specified bit in MSR_P4_PEBS_MATRIX_VERT
+ (-V and -P may be used multiple times to set multiple bits.)
+
+To read:
+
+ cpuperf -r
+
+ optional: all numbers in base 10 unless specified
+
+ -c <cpu> CPU number
+
+<cccr> values:
+
+ BPU_CCCR0
+ BPU_CCCR1
+ BPU_CCCR2
+ BPU_CCCR3
+ MS_CCCR0
+ MS_CCCR1
+ MS_CCCR2
+ MS_CCCR3
+ FLAME_CCCR0
+ FLAME_CCCR1
+ FLAME_CCCR2
+ FLAME_CCCR3
+ IQ_CCCR0
+ IQ_CCCR1
+ IQ_CCCR2
+ IQ_CCCR3
+ IQ_CCCR4
+ IQ_CCCR5
+ NONE - do not program any CCCR, used when setting up an ESCR for tagging
+
+<escr> values:
+
+ BSU_ESCR0
+ BSU_ESCR1
+ FSB_ESCR0
+ FSB_ESCR1
+ MOB_ESCR0
+ MOB_ESCR1
+ PMH_ESCR0
+ PMH_ESCR1
+ BPU_ESCR0
+ BPU_ESCR1
+ IS_ESCR0
+ IS_ESCR1
+ ITLB_ESCR0
+ ITLB_ESCR1
+ IX_ESCR0
+ IX_ESCR1
+ MS_ESCR0
+ MS_ESCR1
+ TBPU_ESCR0
+ TBPU_ESCR1
+ TC_ESCR0
+ TC_ESCR1
+ FIRM_ESCR0
+ FIRM_ESCR1
+ FLAME_ESCR0
+ FLAME_ESCR1
+ DAC_ESCR0
+ DAC_ESCR1
+ SAAT_ESCR0
+ SAAT_ESCR1
+ U2L_ESCR0
+ U2L_ESCR1
+ CRU_ESCR0
+ CRU_ESCR1
+ CRU_ESCR2
+ CRU_ESCR3
+ CRU_ESCR4
+ CRU_ESCR5
+ IQ_ESCR0
+ IQ_ESCR1
+ RAT_ESCR0
+ RAT_ESCR1
+ SSU_ESCR0
+ SSU_ESCR1
+ ALF_ESCR0
+ ALF_ESCR1
+
+
+Example configurations
+======================
+
+Note than in most cases there is a choice of ESCRs and CCCRs for
+each metric although not all combinations are allowed. Each ESCR and
+counter/CCCR can be used only once.
+
+Mispredicted branches retired
+=============================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 3 -m 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 3 -m 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 3 -m 1
+
+Tracecache misses
+=================
+
+cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
+cpuperf -E BPU_ESCR0 -C BPU_CCCR1 -e 3 -m 1
+cpuperf -E BPU_ESCR1 -C BPU_CCCR2 -e 3 -m 1
+cpuperf -E BPU_ESCR1 -C BPU_CCCR3 -e 3 -m 1
+
+I-TLB
+=====
+
+cpuperf -E ITLB_ESCR0 -C BPU_CCCR0 -e 24
+cpuperf -E ITLB_ESCR0 -C BPU_CCCR1 -e 24
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR3 -e 24
+
+ -m <n> : bit 0 count HITS, bit 1 MISSES, bit 2 uncacheable hit
+
+ e.g. all ITLB misses -m 2
+
+Load replays
+============
+
+cpuperf -E MOB_ESCR0 -C BPU_CCCR0 -e 3
+cpuperf -E MOB_ESCR0 -C BPU_CCCR1 -e 3
+cpuperf -E MOB_ESCR1 -C BPU_CCCR2 -e 3
+cpuperf -E MOB_ESCR1 -C BPU_CCCR3 -e 3
+
+ -m <n> : bit mask, replay due to...
+ 1: unknown store address
+ 3: unknown store data
+ 4: partially overlapped data access between LD/ST
+ 5: unaligned address between LD/ST
+
+Page walks
+==========
+
+cpuperf -E PMH_ESCR0 -C BPU_CCCR0 -e 1
+cpuperf -E PMH_ESCR0 -C BPU_CCCR1 -e 1
+cpuperf -E PMH_ESCR1 -C BPU_CCCR2 -e 1
+cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1
+
+ -m <n> : bit 0 counts walks for a D-TLB miss, bit 1 for I-TLB miss
+
+L2/L3 cache accesses
+====================
+
+cpuperf -E BSU_ESCR0 -C BPU_CCCR0 -e 12
+cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12
+cpuperf -E BSU_ESCR1 -C BPU_CCCR2 -e 12
+cpuperf -E BSU_ESCR1 -C BPU_CCCR3 -e 12
+
+ -m <n> : where the bit mask is:
+ 0: Read L2 HITS Shared
+ 1: Read L2 HITS Exclusive
+ 2: Read L2 HITS Modified
+ 3: Read L3 HITS Shared
+ 4: Read L3 HITS Exclusive
+ 5: Read L3 HITS Modified
+ 8: Read L2 MISS
+ 9: Read L3 MISS
+ 10: Write L2 MISS
+
+Front side bus activity
+=======================
+
+cpuperf -E FSB_ESCR0 -C BPU_CCCR0 -e 23 -k -g
+cpuperf -E FSB_ESCR0 -C BPU_CCCR1 -e 23 -k -g
+cpuperf -E FSB_ESCR1 -C BPU_CCCR2 -e 23 -k -g
+cpuperf -E FSB_ESCR1 -C BPU_CCCR3 -e 23 -k -g
+
+ -m <n> : where the bit mask is for bus events:
+ 0: DRDY_DRV Processor drives bus
+ 1: DRDY_OWN Processor reads bus
+ 2: DRDY_OTHER Data on bus not being sampled by processor
+ 3: DBSY_DRV Processor reserves bus for driving
+ 4: DBSY_OWN Other entity reserves bus for sending to processor
+ 5: DBSY_OTHER Other entity reserves bus for sending elsewhere
+
+ e.g. -m 3 to get cycles bus actually in use.
+
+Pipeline clear (entire)
+=======================
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 2
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 2
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 2
+
+ -m <n> : bit mask:
+ 0: counts a portion of cycles while clear (use -g for edge trigger)
+ 1: counts each time machine clears for memory ordering issues
+ 2: counts each time machine clears for self modifying code
+
+Instructions retired
+====================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 2
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 2
+
+ -m <n> : bit mask:
+ 0: counts non-bogus, not tagged instructions
+ 1: counts non-bogus, tagged instructions
+ 2: counts bogus, not tagged instructions
+ 3: counts bogus, tagged instructions
+
+ e.g. -m 3 to count legit retirements
+
+Uops retired
+============
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 1
+
+ -m <n> : bit mask:
+ 0: Non-bogus
+ 1: Bogus
+
+x87 FP uops
+===========
+
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR1 -e 4 -m 32768
+cpuperf -E FIRM_ESCR1 -C FLAME_CCCR2 -e 4 -m 32768
+cpuperf -E FIRM_ESCR1 -C FLAME_CCCR3 -e 4 -m 32768
+
+Replay tagging mechanism
+========================
+
+Counts retirement of uops tagged with the replay tagging mechanism
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 9
+
+ -m <n> : bit mask:
+ 0: Non-bogus (set this bit for all events listed below)
+ 1: Bogus
+
+Set replay tagging mechanism bits with -P and -V:
+
+ L1 cache load miss retired: -P 0 -P 24 -P 25 -V 0
+ L2 cache load miss retired: -P 1 -P 24 -P 25 -V 0 (read manual)
+ DTLB load miss retired: -P 2 -P 24 -P 25 -V 0
+ DTLB store miss retired: -P 2 -P 24 -P 25 -V 1
+ DTLB all miss retired: -P 2 -P 24 -P 25 -V 0 -V 1
+
+e.g. to count all DTLB misses
+
+ cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 -m 1 P 2 -P 24 -P 25 -V 0 -V 1
+
+Front end event
+===============
+
+To count tagged uops:
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 8
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 8
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 8
+
+ -m <n> : bit 0 for non-bogus uops, bit 1 for bogus uops
+
+Must have another ESCR programmed to tag uops as required
+
+cpuperf -E RAT_ESCR0 -C NONE -e 2
+cpuperf -E RAT_ESCR1 -C NONE -e 2
+
+ -m <n> : bit 1 for LOADs, bit 2 for STOREs
+
+An example set of counters
+===========================
+
+# instructions retired
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3
+
+# trace cache misses
+cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
+
+# L1 D cache misses (load misses retired)
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 -m 1 -P 0 -P 24 -P 25 -V 0
+
+# L2 misses (load and store)
+cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 -m 1280
+
+# I-TLB misses
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 -m 2
+
+# D-TLB misses (as PT walks)
+cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 -m 1
+
+# Other 'bonus' counters would be:
+# number of loads executed - need both command lines
+cpuperf -E RAT_ESCR0 -C NONE -e 2 -m 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 -m 3
+
+# number of mispredicted branches
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
+
+# x87 FP uOps
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
+
+The above has counter assignments
+
+0 Trace cache misses
+1 L2 Misses
+2 I-TLB misses
+3 D-TLB misses
+4
+5
+6
+7
+8 x87 FP uOps
+9
+10
+11
+12 Instructions retired
+13 L1 D cache misses
+14 Mispredicted branches
+15 Loads executed
+16
+17
+
+Counting instructions retired on each logical CPU
+=================================================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 -t 12
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 -m 3 -t 3
+
+Cannot count mispred branches as well due to CRU_ESCR1 use.
--- /dev/null
+/*
+ * User mode program to program performance counters.
+ *
+ * JRB/IAP October 2003.
+ *
+ * $Id: cpuperf.c,v 1.2 2003/10/14 11:00:59 jrb44 Exp $
+ *
+ * $Log: cpuperf.c,v $
+ * Revision 1.2 2003/10/14 11:00:59 jrb44
+ * Added dcefault CPU. Added NONE CCCR.
+ *
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#include <sys/types.h>
+#include <sched.h>
+#include <error.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "p4perf.h"
+
+static inline void cpus_wrmsr(int cpu_mask,
+ int msr,
+ unsigned int low,
+ unsigned int high )
+{
+ fprintf(stderr, "No backend to write MSR 0x%x <= 0x%08x%08x on %08x\n",
+ msr, high, low, cpu_mask);
+}
+
+static inline unsigned long long cpus_rdmsr( int cpu_mask, int msr )
+{
+ fprintf(stderr, "No backend to read MSR 0x%x on %08x\n", msr, cpu_mask);
+ return 0;
+}
+
+#ifdef PERFCNTR
+#include "cpuperf_perfcntr.h"
+#define cpus_wrmsr perfcntr_wrmsr
+#define cpus_rdmsr perfcntr_rdmsr
+#endif
+
+#ifdef XENO
+#include "cpuperf_xeno.h"
+#define cpus_wrmsr dom0_wrmsr
+#define cpus_rdmsr dom0_rdmsr
+#endif
+
+struct macros {
+ char *name;
+ unsigned long msr_addr;
+ int number;
+};
+
+#define NO_CCCR 0xfffffffe
+
+struct macros msr[] = {
+ {"BPU_COUNTER0", 0x300, 0},
+ {"BPU_COUNTER1", 0x301, 1},
+ {"BPU_COUNTER2", 0x302, 2},
+ {"BPU_COUNTER3", 0x303, 3},
+ {"MS_COUNTER0", 0x304, 4},
+ {"MS_COUNTER1", 0x305, 5},
+ {"MS_COUNTER2", 0x306, 6},
+ {"MS_COUNTER3", 0x307, 7},
+ {"FLAME_COUNTER0", 0x308, 8},
+ {"FLAME_COUNTER1", 0x309, 9},
+ {"FLAME_COUNTER2", 0x30a, 10},
+ {"FLAME_COUNTER3", 0x30b, 11},
+ {"IQ_COUNTER0", 0x30c, 12},
+ {"IQ_COUNTER1", 0x30d, 13},
+ {"IQ_COUNTER2", 0x30e, 14},
+ {"IQ_COUNTER3", 0x30f, 15},
+ {"IQ_COUNTER4", 0x310, 16},
+ {"IQ_COUNTER5", 0x311, 17},
+ {"BPU_CCCR0", 0x360, 0},
+ {"BPU_CCCR1", 0x361, 1},
+ {"BPU_CCCR2", 0x362, 2},
+ {"BPU_CCCR3", 0x363, 3},
+ {"MS_CCCR0", 0x364, 4},
+ {"MS_CCCR1", 0x365, 5},
+ {"MS_CCCR2", 0x366, 6},
+ {"MS_CCCR3", 0x367, 7},
+ {"FLAME_CCCR0", 0x368, 8},
+ {"FLAME_CCCR1", 0x369, 9},
+ {"FLAME_CCCR2", 0x36a, 10},
+ {"FLAME_CCCR3", 0x36b, 11},
+ {"IQ_CCCR0", 0x36c, 12},
+ {"IQ_CCCR1", 0x36d, 13},
+ {"IQ_CCCR2", 0x36e, 14},
+ {"IQ_CCCR3", 0x36f, 15},
+ {"IQ_CCCR4", 0x370, 16},
+ {"IQ_CCCR5", 0x371, 17},
+ {"BSU_ESCR0", 0x3a0, 7},
+ {"BSU_ESCR1", 0x3a1, 7},
+ {"FSB_ESCR0", 0x3a2, 6},
+ {"FSB_ESCR1", 0x3a3, 6},
+ {"MOB_ESCR0", 0x3aa, 2},
+ {"MOB_ESCR1", 0x3ab, 2},
+ {"PMH_ESCR0", 0x3ac, 4},
+ {"PMH_ESCR1", 0x3ad, 4},
+ {"BPU_ESCR0", 0x3b2, 0},
+ {"BPU_ESCR1", 0x3b3, 0},
+ {"IS_ESCR0", 0x3b4, 1},
+ {"IS_ESCR1", 0x3b5, 1},
+ {"ITLB_ESCR0", 0x3b6, 3},
+ {"ITLB_ESCR1", 0x3b7, 3},
+ {"IX_ESCR0", 0x3c8, 5},
+ {"IX_ESCR1", 0x3c9, 5},
+ {"MS_ESCR0", 0x3c0, 0},
+ {"MS_ESCR1", 0x3c1, 0},
+ {"TBPU_ESCR0", 0x3c2, 2},
+ {"TBPU_ESCR1", 0x3c3, 2},
+ {"TC_ESCR0", 0x3c4, 1},
+ {"TC_ESCR1", 0x3c5, 1},
+ {"FIRM_ESCR0", 0x3a4, 1},
+ {"FIRM_ESCR1", 0x3a5, 1},
+ {"FLAME_ESCR0", 0x3a6, 0},
+ {"FLAME_ESCR1", 0x3a7, 0},
+ {"DAC_ESCR0", 0x3a8, 5},
+ {"DAC_ESCR1", 0x3a9, 5},
+ {"SAAT_ESCR0", 0x3ae, 2},
+ {"SAAT_ESCR1", 0x3af, 2},
+ {"U2L_ESCR0", 0x3b0, 3},
+ {"U2L_ESCR1", 0x3b1, 3},
+ {"CRU_ESCR0", 0x3b8, 4},
+ {"CRU_ESCR1", 0x3b9, 4},
+ {"CRU_ESCR2", 0x3cc, 5},
+ {"CRU_ESCR3", 0x3cd, 5},
+ {"CRU_ESCR4", 0x3e0, 6},
+ {"CRU_ESCR5", 0x3e1, 6},
+ {"IQ_ESCR0", 0x3ba, 0},
+ {"IQ_ESCR1", 0x3bb, 0},
+ {"RAT_ESCR0", 0x3bc, 2},
+ {"RAT_ESCR1", 0x3bd, 2},
+ {"SSU_ESCR0", 0x3be, 3},
+ {"SSU_ESCR1", 0x3bf, 3},
+ {"ALF_ESCR0", 0x3ca, 1},
+ {"ALF_ESCR1", 0x3cb, 1},
+ {"PEBS_ENABLE", 0x3f1, 0},
+ {"PEBS_MATRIX_VERT", 0x3f2, 0},
+ {"NONE", NO_CCCR, 0},
+ {NULL, 0, 0}
+};
+
+struct macros *lookup_macro(char *str)
+{
+ struct macros *m;
+
+ m = msr;
+ while (m->name) {
+ if (strcmp(m->name, str) == 0)
+ return m;
+ m++;
+ }
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
+ unsigned int cpu_mask = 1;
+ struct macros *escr = NULL, *cccr = NULL;
+ unsigned long escr_val, cccr_val;
+ int debug = 0;
+ unsigned long pebs = 0, pebs_vert = 0;
+ int pebs_x = 0, pebs_vert_x = 0;
+ int read = 0;
+ int compare = 0;
+ int complement = 0;
+ int edge = 0;
+
+#ifdef XENO
+ xen_init();
+#endif
+
+
+ while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:rkng")) != -1) {
+ switch((char)c) {
+ case 'P':
+ pebs |= 1 << atoi(optarg);
+ pebs_x = 1;
+ break;
+ case 'V':
+ pebs_vert |= 1 << atoi(optarg);
+ pebs_vert_x = 1;
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 'c':
+ {
+ int cpu = atoi(optarg);
+ cpu_mask = (cpu == -1)?(~0):(1<<cpu);
+ }
+ break;
+ case 't': // ESCR thread bits
+ t = atoi(optarg);
+ break;
+ case 'e': // eventsel
+ es = atoi(optarg);
+ break;
+ case 'm': // eventmask
+ em = atoi(optarg);
+ break;
+ case 'T': // tag value
+ tv = atoi(optarg);
+ te = 1;
+ break;
+ case 'E':
+ escr = lookup_macro(optarg);
+ if (!escr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'C':
+ cccr = lookup_macro(optarg);
+ if (!cccr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'r':
+ read = 1;
+ break;
+ case 'k':
+ compare = 1;
+ break;
+ case 'n':
+ complement = 1;
+ break;
+ case 'g':
+ edge = 1;
+ break;
+ }
+ }
+
+ if (read) {
+ while((cpu_mask&1)) {
+ int i;
+ for (i=0x300;i<0x312;i++) {
+ printf("%010llx ",cpus_rdmsr( cpu_mask, i ) );
+ }
+ printf("\n");
+ cpu_mask>>=1;
+ }
+ exit(1);
+ }
+
+ if (!escr) {
+ fprintf(stderr, "Need an ESCR.\n");
+ exit(1);
+ }
+ if (!cccr) {
+ fprintf(stderr, "Need a counter number.\n");
+ exit(1);
+ }
+
+ escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
+ P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
+ cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
+ ((compare)?P4_CCCR_COMPARE:0) |
+ ((complement)?P4_CCCR_COMPLEMENT:0) |
+ ((edge)?P4_CCCR_EDGE:0) |
+ P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
+
+ if (debug) {
+ fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
+ if (cccr->msr_addr != NO_CCCR)
+ fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
+ cccr->msr_addr, cccr_val, cccr->number);
+ if (pebs_x)
+ fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_ENABLE, pebs);
+ if (pebs_vert_x)
+ fprintf(stderr, "PMV 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
+ }
+
+ cpus_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
+ if (cccr->msr_addr != NO_CCCR)
+ cpus_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
+
+ if (pebs_x)
+ cpus_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
+
+ if (pebs_vert_x)
+ cpus_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
+
+ return 0;
+}
+
+// End of $RCSfile: cpuperf.c,v $
+
--- /dev/null
+/*
+ * Interface to JRB44's /proc/perfcntr interface.
+ *
+ * $Id: cpuperf_perfcntr.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+ *
+ * $Log: cpuperf_perfcntr.h,v $
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#define PROC_PERFCNTR "/proc/perfcntr"
+
+static inline void perfcntr_wrmsr(int cpu_mask,
+ int msr,
+ unsigned int low,
+ unsigned int high )
+{
+ FILE *fd;
+ unsigned long long value = low | (((unsigned long long)high) << 32);
+
+ fd = fopen(PROC_PERFCNTR, "w");
+ if (fd == NULL)
+ {
+ perror("open " PROC_PERFCNTR);
+ exit(1);
+ }
+
+ fprintf(fd, "%x %x %llx \n", cpu_mask, msr, value);
+ fprintf(stderr, "%x %x %llx \n", cpu_mask, msr, value);
+ fclose(fd);
+}
+
+static inline unsigned long long perfcntr_rdmsr( int cpu_mask, int msr )
+{
+ fprintf(stderr, "WARNING: rdmsr not yet implemented for perfcntr.\n");
+ return 0;
+}
+
+// End of $RCSfile: cpuperf_perfcntr.h,v $
+
--- /dev/null
+/*
+ * Interface to Xen MSR hypercalls.
+ *
+ * $Id: cpuperf_xeno.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+ *
+ * $Log: cpuperf_xeno.h,v $
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#include <xc.h>
+
+static int xc_handle;
+
+void xen_init()
+{
+ if ( (xc_handle = xc_interface_open()) == -1 )
+ {
+ fprintf(stderr, "Error opening xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ exit(-1);
+ }
+
+}
+
+void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
+{
+ xc_msr_write (xc_handle, cpu_mask, msr, low, high);
+}
+
+unsigned long long dom0_rdmsr( int cpu_mask, int msr )
+{
+ return xc_msr_read(xc_handle, cpu_mask, msr);
+}
+
+// End of $RCSfile: cpuperf_xeno.h,v $
+
--- /dev/null
+#############################################################################
+# (C) 2005 - Rolf Neugebauer - Intel Research Cambridge
+#############################################################################
+#
+# File: Makefile
+# Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
+# Date: Mar 2005
+#
+# Environment:
+#
+
+# invoke:
+# make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules_install
+
+obj-m := perfcntr.o
+
--- /dev/null
+/*
+ * Linux loadable kernel module to use P4 performance counters.
+ *
+ * James Bulpin, Feb 2003.
+ *
+ * $Id$
+ *
+ * $Log$
+ */
+
+#define DRV_NAME "perfcntr"
+#define DRV_VERSION "0.2"
+#define DRV_RELDATE "02 Jun 2004"
+
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define NOHT
+
+#include "../p4perf.h"
+
+#ifdef NOHT
+# define CPUMASK 0x00000003
+#else
+# define CPUMASK 0x00000005
+#endif
+
+/*****************************************************************************
+ * Module admin *
+ *****************************************************************************/
+
+MODULE_AUTHOR("James Bulpin <James.Bulpin@cl.cam.ac.uk>");
+MODULE_DESCRIPTION("P4 Performance Counters access "
+ DRV_VERSION " " DRV_RELDATE);
+MODULE_LICENSE("GPL");
+
+static char version[] __devinitdata =
+DRV_NAME ": James Bulpin.\n";
+
+static unsigned char foobar[4];
+
+/* rpcc: get full 64-bit Pentium TSC value
+ */
+static __inline__ unsigned long long int rpcc(void)
+{
+ unsigned int __h, __l;
+ __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
+ return (((unsigned long long)__h) << 32) + __l;
+}
+
+/*****************************************************************************
+ * Display the counters *
+ *****************************************************************************/
+
+//#define processor cpu // post 2.4.16
+
+typedef union {
+ struct {
+ unsigned long lo;
+ unsigned long hi;
+ };
+ unsigned long long cnt;
+} cpu_perfcntr_t;
+
+typedef struct counters_t_struct {
+ int processor;
+ unsigned long long tsc;
+ cpu_perfcntr_t counters[18];
+} counters_t;
+
+typedef struct perfcntr_t_struct {
+ unsigned long cpu_mask;
+ counters_t cpus[4]; // Actually for each cpu in system
+} perfcntr_t;
+
+#ifdef HUMAN_READABLE
+# define SHOW_COUNTER(c) rdmsr (c, l, h);\
+ seq_printf(m, "0x%03x: 0x%08x%08x\n", c, h, l)
+#else
+# define SHOW_COUNTER(c) rdmsr (c, l, h);\
+ seq_printf(m, " %llu", \
+ (unsigned long long)h << 32 | (unsigned long long)l)
+#endif
+
+#if 0
+static unsigned long last_l = 0, last_h = 0, last_msr = 0;
+static int last_cpu = 0;
+#endif
+
+#define READ_COUNTER(_i, _msr) rdmsr((_msr), l, h); c->counters[_i].lo = l; \
+ c->counters[_i].hi = h;
+
+static perfcntr_t perfcntrs;
+
+static void show_perfcntr_for(void *v)
+{
+ unsigned int l, h;
+
+ perfcntr_t *p = &perfcntrs;
+ counters_t *c;
+
+ if (!((1 << smp_processor_id()) & p->cpu_mask))
+ return;
+
+ c = &p->cpus[smp_processor_id()];
+
+ c->processor = smp_processor_id();
+ c->tsc = rpcc();
+
+ READ_COUNTER(0, MSR_P4_BPU_COUNTER0);
+ READ_COUNTER(1, MSR_P4_BPU_COUNTER1);
+ READ_COUNTER(2, MSR_P4_BPU_COUNTER2);
+ READ_COUNTER(3, MSR_P4_BPU_COUNTER3);
+
+ READ_COUNTER(4, MSR_P4_MS_COUNTER0);
+ READ_COUNTER(5, MSR_P4_MS_COUNTER1);
+ READ_COUNTER(6, MSR_P4_MS_COUNTER2);
+ READ_COUNTER(7, MSR_P4_MS_COUNTER3);
+
+ READ_COUNTER(8, MSR_P4_FLAME_COUNTER0);
+ READ_COUNTER(9, MSR_P4_FLAME_COUNTER1);
+ READ_COUNTER(10, MSR_P4_FLAME_COUNTER2);
+ READ_COUNTER(11, MSR_P4_FLAME_COUNTER3);
+
+ READ_COUNTER(12, MSR_P4_IQ_COUNTER0);
+ READ_COUNTER(13, MSR_P4_IQ_COUNTER1);
+ READ_COUNTER(14, MSR_P4_IQ_COUNTER2);
+ READ_COUNTER(15, MSR_P4_IQ_COUNTER3);
+ READ_COUNTER(16, MSR_P4_IQ_COUNTER4);
+ READ_COUNTER(17, MSR_P4_IQ_COUNTER5);
+
+ return;
+}
+
+static int show_perfcntr(struct seq_file *m, void *v)
+{
+ int i, j;
+
+ // Get each physical cpu to read counters
+ perfcntrs.cpu_mask = CPUMASK;
+
+ smp_call_function(show_perfcntr_for, NULL, 1, 1);
+ show_perfcntr_for(NULL);
+
+ for (i = 0; i < 32; i++) {
+ if (((1 << i) & (perfcntrs.cpu_mask = CPUMASK))) {
+ counters_t *c = &perfcntrs.cpus[i];
+ seq_printf(m, "%u %llu", c->processor, c->tsc);
+ for (j = 0; j < 18; j++) {
+ seq_printf(m, " %llu", c->counters[j].cnt);
+ }
+ seq_printf(m, "\n");
+ }
+ }
+
+#if 0
+ unsigned long long t;
+ unsigned int l, h;
+
+ t = rpcc();
+
+
+
+#ifdef HUMAN_READABLE
+ seq_printf(m,
+ "show_perfcntr\nprocessor: %u\ntime: %llu\n"
+ "last write: 0x%08lx%08lx -> 0x%lx (CPU%u)\n",
+ smp_processor_id(),
+ t,
+ last_h,
+ last_l,
+ last_msr,
+ last_cpu);
+#else
+ seq_printf(m, "%u %llu", smp_processor_id(), t);
+#endif
+
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER0);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER1);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER2);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_MS_COUNTER0);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER1);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER2);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER0);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER1);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER2);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER0);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER1);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER2);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER3);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER4);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER5);
+
+#ifndef HUMAN_READBLE
+ seq_printf(m, "\n");
+#endif
+
+#endif
+
+ return 0;
+}
+
+/*****************************************************************************
+ * Show counter configuration *
+ *****************************************************************************/
+
+typedef union {
+ struct {
+ unsigned long lo;
+ unsigned long hi;
+ };
+ unsigned long long cnt;
+} cpu_perfcfg_t;
+
+typedef struct configs_t_struct {
+ int processor;
+ unsigned long long tsc;
+ cpu_perfcfg_t cccr[18];
+ cpu_perfcfg_t escr[0x42];
+} configs_t;
+
+typedef struct perfcfg_t_struct {
+ unsigned long cpu_mask;
+ configs_t cpus[4]; // Actually for each cpu in system
+} perfcfg_t;
+
+static perfcfg_t perfcfgs;
+
+#define READ_CCCR(_i, _msr) rdmsr((_msr), l, h); c->cccr[_i].lo = l; \
+ c->cccr[_i].hi = h;
+#define READ_ESCR(_i, _msr) rdmsr((_msr), l, h); c->escr[_i].lo = l; \
+ c->escr[_i].hi = h;
+
+static void show_perfcfg_for(void *v)
+{
+ unsigned int l, h;
+
+ perfcfg_t *p = &perfcfgs;
+ configs_t *c;
+
+ if (!((1 << smp_processor_id()) & p->cpu_mask))
+ return;
+
+ c = &p->cpus[smp_processor_id()];
+
+ c->processor = smp_processor_id();
+ c->tsc = rpcc();
+
+ READ_CCCR(0, MSR_P4_BPU_CCCR0);
+ READ_CCCR(1, MSR_P4_BPU_CCCR1);
+ READ_CCCR(2, MSR_P4_BPU_CCCR2);
+ READ_CCCR(3, MSR_P4_BPU_CCCR3);
+
+ READ_CCCR(4, MSR_P4_MS_CCCR0);
+ READ_CCCR(5, MSR_P4_MS_CCCR1);
+ READ_CCCR(6, MSR_P4_MS_CCCR2);
+ READ_CCCR(7, MSR_P4_MS_CCCR3);
+
+ READ_CCCR(8, MSR_P4_FLAME_CCCR0);
+ READ_CCCR(9, MSR_P4_FLAME_CCCR1);
+ READ_CCCR(10, MSR_P4_FLAME_CCCR2);
+ READ_CCCR(11, MSR_P4_FLAME_CCCR3);
+
+ READ_CCCR(12, MSR_P4_IQ_CCCR0);
+ READ_CCCR(13, MSR_P4_IQ_CCCR1);
+ READ_CCCR(14, MSR_P4_IQ_CCCR2);
+ READ_CCCR(15, MSR_P4_IQ_CCCR3);
+ READ_CCCR(16, MSR_P4_IQ_CCCR4);
+ READ_CCCR(17, MSR_P4_IQ_CCCR5);
+
+ READ_ESCR(0x00, MSR_P4_BSU_ESCR0);
+ READ_ESCR(0x02, MSR_P4_FSB_ESCR0);
+ READ_ESCR(0x0a, MSR_P4_MOB_ESCR0);
+ READ_ESCR(0x0c, MSR_P4_PMH_ESCR0);
+ READ_ESCR(0x12, MSR_P4_BPU_ESCR0);
+ READ_ESCR(0x14, MSR_P4_IS_ESCR0);
+ READ_ESCR(0x16, MSR_P4_ITLB_ESCR0);
+ READ_ESCR(0x28, MSR_P4_IX_ESCR0);
+ READ_ESCR(0x01, MSR_P4_BSU_ESCR1);
+ READ_ESCR(0x03, MSR_P4_FSB_ESCR1);
+ READ_ESCR(0x0b, MSR_P4_MOB_ESCR1);
+ READ_ESCR(0x0d, MSR_P4_PMH_ESCR1);
+ READ_ESCR(0x13, MSR_P4_BPU_ESCR1);
+ READ_ESCR(0x15, MSR_P4_IS_ESCR1);
+ READ_ESCR(0x17, MSR_P4_ITLB_ESCR1);
+ READ_ESCR(0x29, MSR_P4_IX_ESCR1);
+ READ_ESCR(0x20, MSR_P4_MS_ESCR0);
+ READ_ESCR(0x22, MSR_P4_TBPU_ESCR0);
+ READ_ESCR(0x24, MSR_P4_TC_ESCR0);
+ READ_ESCR(0x21, MSR_P4_MS_ESCR1);
+ READ_ESCR(0x23, MSR_P4_TBPU_ESCR1);
+ READ_ESCR(0x25, MSR_P4_TC_ESCR1);
+ READ_ESCR(0x04, MSR_P4_FIRM_ESCR0);
+ READ_ESCR(0x06, MSR_P4_FLAME_ESCR0);
+ READ_ESCR(0x08, MSR_P4_DAC_ESCR0);
+ READ_ESCR(0x0e, MSR_P4_SAAT_ESCR0);
+ READ_ESCR(0x10, MSR_P4_U2L_ESCR0);
+ READ_ESCR(0x05, MSR_P4_FIRM_ESCR1);
+ READ_ESCR(0x07, MSR_P4_FLAME_ESCR1);
+ READ_ESCR(0x09, MSR_P4_DAC_ESCR1);
+ READ_ESCR(0x0f, MSR_P4_SAAT_ESCR1);
+ READ_ESCR(0x11, MSR_P4_U2L_ESCR1);
+ READ_ESCR(0x18, MSR_P4_CRU_ESCR0);
+ READ_ESCR(0x2c, MSR_P4_CRU_ESCR2);
+ READ_ESCR(0x40, MSR_P4_CRU_ESCR4);
+ READ_ESCR(0x1a, MSR_P4_IQ_ESCR0);
+ READ_ESCR(0x1c, MSR_P4_RAT_ESCR0);
+ READ_ESCR(0x1e, MSR_P4_SSU_ESCR0);
+ READ_ESCR(0x2a, MSR_P4_ALF_ESCR0);
+ READ_ESCR(0x19, MSR_P4_CRU_ESCR1);
+ READ_ESCR(0x2d, MSR_P4_CRU_ESCR3);
+ READ_ESCR(0x41, MSR_P4_CRU_ESCR5);
+ READ_ESCR(0x1b, MSR_P4_IQ_ESCR1);
+ READ_ESCR(0x1d, MSR_P4_RAT_ESCR1);
+ READ_ESCR(0x2b, MSR_P4_ALF_ESCR1);
+
+ return;
+}
+
+static char *escr_names[] = {
+ "BSU_ESCR0",
+ "BSU_ESCR1",
+ "FSB_ESCR0",
+ "FSB_ESCR1",
+ "FIRM_ESCR0",
+ "FIRM_ESCR1",
+ "FLAME_ESCR0",
+ "FLAME_ESCR1",
+ "DAC_ESCR0",
+ "DAC_ESCR1",
+ "MOB_ESCR0",
+ "MOB_ESCR1",
+ "PMH_ESCR0",
+ "PMH_ESCR1",
+ "SAAT_ESCR0",
+ "SAAT_ESCR1",
+ "U2L_ESCR0",
+ "U2L_ESCR1",
+ "BPU_ESCR0",
+ "BPU_ESCR1",
+ "IS_ESCR0",
+ "IS_ESCR1",
+ "ITLB_ESCR0",
+ "ITLB_ESCR1",
+ "CRU_ESCR0",
+ "CRU_ESCR1",
+ "IQ_ESCR0",
+ "IQ_ESCR1",
+ "RAT_ESCR0",
+ "RAT_ESCR1",
+ "SSU_ESCR0",
+ "SSU_ESCR1",
+ "MS_ESCR0",
+ "MS_ESCR1",
+ "TBPU_ESCR0",
+ "TBPU_ESCR1",
+ "TC_ESCR0",
+ "TC_ESCR1",
+ "0x3c6",
+ "0x3c7",
+ "IX_ESCR0",
+ "IX_ESCR1",
+ "ALF_ESCR0",
+ "ALF_ESCR1",
+ "CRU_ESCR2",
+ "CRU_ESCR3",
+ "0x3ce",
+ "0x3cf",
+ "0x3d0",
+ "0x3d1",
+ "0x3d2",
+ "0x3d3",
+ "0x3d4",
+ "0x3d5",
+ "0x3d6",
+ "0x3d7",
+ "0x3d8",
+ "0x3d9",
+ "0x3da",
+ "0x3db",
+ "0x3dc",
+ "0x3dd",
+ "0x3de",
+ "0x3df",
+ "CRU_ESCR4",
+ "CRU_ESCR5"
+};
+
+static unsigned long escr_map_0[] =
+{MSR_P4_BPU_ESCR0, MSR_P4_IS_ESCR0,
+ MSR_P4_MOB_ESCR0, MSR_P4_ITLB_ESCR0,
+ MSR_P4_PMH_ESCR0, MSR_P4_IX_ESCR0,
+ MSR_P4_FSB_ESCR0, MSR_P4_BSU_ESCR0}; //BPU even
+static unsigned long escr_map_1[] =
+ {MSR_P4_BPU_ESCR1, MSR_P4_IS_ESCR1,
+ MSR_P4_MOB_ESCR1, MSR_P4_ITLB_ESCR1,
+ MSR_P4_PMH_ESCR1, MSR_P4_IX_ESCR1,
+ MSR_P4_FSB_ESCR1, MSR_P4_BSU_ESCR1}; //BPU odd
+static unsigned long escr_map_2[] =
+ {MSR_P4_MS_ESCR0, MSR_P4_TC_ESCR0, MSR_P4_TBPU_ESCR0,
+ 0, 0, 0, 0, 0}; //MS even
+static unsigned long escr_map_3[] =
+ {MSR_P4_MS_ESCR1, MSR_P4_TC_ESCR1, MSR_P4_TBPU_ESCR1,
+ 0, 0, 0, 0, 0}; //MS odd
+static unsigned long escr_map_4[] =
+ {MSR_P4_FLAME_ESCR0, MSR_P4_FIRM_ESCR0, MSR_P4_SAAT_ESCR0,
+ MSR_P4_U2L_ESCR0, 0, MSR_P4_DAC_ESCR0, 0, 0}; //FLAME even
+static unsigned long escr_map_5[] =
+ {MSR_P4_FLAME_ESCR1, MSR_P4_FIRM_ESCR1, MSR_P4_SAAT_ESCR1,
+ MSR_P4_U2L_ESCR1, 0, MSR_P4_DAC_ESCR1, 0, 0}; //FLAME odd
+static unsigned long escr_map_6[] =
+ {MSR_P4_IQ_ESCR0, MSR_P4_ALF_ESCR0,
+ MSR_P4_RAT_ESCR0, MSR_P4_SSU_ESCR0,
+ MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR4, 0}; //IQ even
+static unsigned long escr_map_7[] =
+ {MSR_P4_IQ_ESCR1, MSR_P4_ALF_ESCR1,
+ MSR_P4_RAT_ESCR1, 0,
+ MSR_P4_CRU_ESCR1, MSR_P4_CRU_ESCR3, MSR_P4_CRU_ESCR5, 0}; //IQ odd
+
+static unsigned long *escr_map[] = {
+ escr_map_0,
+ escr_map_1,
+ escr_map_2,
+ escr_map_3,
+ escr_map_4,
+ escr_map_5,
+ escr_map_6,
+ escr_map_7,
+};
+
+unsigned long get_escr_msr(int c, int e)
+{
+ int index = -1;
+
+ // Get the ESCR MSR address from the counter number and the ESCR number.
+ switch (c) {
+ case P4_BPU_COUNTER0_NUMBER:
+ case P4_BPU_COUNTER1_NUMBER:
+ index = 0;
+ break;
+ case P4_BPU_COUNTER2_NUMBER:
+ case P4_BPU_COUNTER3_NUMBER:
+ index = 1;
+ break;
+ case P4_MS_COUNTER0_NUMBER:
+ case P4_MS_COUNTER1_NUMBER:
+ index = 2; // probably !
+ break;
+ case P4_MS_COUNTER2_NUMBER:
+ case P4_MS_COUNTER3_NUMBER:
+ index = 3; // probably !
+ break;
+ case P4_FLAME_COUNTER0_NUMBER:
+ case P4_FLAME_COUNTER1_NUMBER:
+ index = 4; // probably !
+ break;
+ case P4_FLAME_COUNTER2_NUMBER:
+ case P4_FLAME_COUNTER3_NUMBER:
+ index = 5; // probably !
+ break;
+ case P4_IQ_COUNTER0_NUMBER:
+ case P4_IQ_COUNTER1_NUMBER:
+ case P4_IQ_COUNTER4_NUMBER:
+ index = 6;
+ break;
+ case P4_IQ_COUNTER2_NUMBER:
+ case P4_IQ_COUNTER3_NUMBER:
+ case P4_IQ_COUNTER5_NUMBER:
+ index = 7;
+ break;
+ }
+
+ if (index != -1) {
+ return escr_map[index][e];
+ }
+
+ return 0;
+}
+
+static char null_string[] = "";
+static char *get_escr(int c, int e)
+{
+ unsigned long msr = get_escr_msr(c, e);
+
+ if ((msr >= 0x3a0) && (msr <= 0x3e1))
+ return escr_names[(int)(msr - 0x3a0)];
+ return null_string;
+}
+
+static int show_perfcfg(struct seq_file *m, void *v)
+{
+ int i, j;
+
+ // Get each physical cpu to read configs
+ perfcfgs.cpu_mask = CPUMASK;
+
+ smp_call_function(show_perfcfg_for, NULL, 1, 1);
+ show_perfcfg_for(NULL);
+
+ for (i = 0; i < 32; i++) {
+ if (((1 << i) & (perfcfgs.cpu_mask = CPUMASK))) {
+ configs_t *c = &perfcfgs.cpus[i];
+ seq_printf(m, "----------------------------------------\n");
+ seq_printf(m, "%u %llu\n", c->processor, c->tsc);
+ for (j = 0; j < 18; j++) {
+ seq_printf(m, "%08lx", c->cccr[j].lo);
+
+ if (!(c->cccr[j].lo & P4_CCCR_ENABLE))
+ seq_printf(m, " DISABLED");
+ else {
+ unsigned long escr_msr =
+ get_escr_msr(i, (int)((c->cccr[j].lo >> 13)&7));
+ seq_printf(m, " ESCR=%s",
+ get_escr(i, (int)((c->cccr[j].lo >> 13)&7)));
+ if ((escr_msr >= 0x3a0) && (escr_msr <= 0x3e1)) {
+ unsigned long e = c->escr[(int)(escr_msr - 0x3a0)].lo;
+ seq_printf(m, "(%08lx es=%lx mask=%lx", e,
+ (e >> 25) & 0x7f,
+ (e >> 9) & 0xffff);
+ if ((e & P4_ESCR_T0_USR))
+ seq_printf(m, " T(0)USR");
+ if ((e & P4_ESCR_T0_OS))
+ seq_printf(m, " T(0)OS");
+ if ((e & P4_ESCR_T1_USR))
+ seq_printf(m, " T1USR");
+ if ((e & P4_ESCR_T1_OS))
+ seq_printf(m, " T1OS");
+ seq_printf(m, ")");
+ }
+ seq_printf(m, " AT=%u", (int)((c->cccr[j].lo >> 16)&3));
+
+ if ((c->cccr[j].lo & P4_CCCR_OVF))
+ seq_printf(m, " OVF");
+ if ((c->cccr[j].lo & P4_CCCR_CASCADE))
+ seq_printf(m, " CASC");
+ if ((c->cccr[j].lo & P4_CCCR_FORCE_OVF))
+ seq_printf(m, " F-OVF");
+ if ((c->cccr[j].lo & P4_CCCR_EDGE))
+ seq_printf(m, " EDGE");
+ if ((c->cccr[j].lo & P4_CCCR_COMPLEMENT))
+ seq_printf(m, " COMPL");
+ if ((c->cccr[j].lo & P4_CCCR_COMPARE))
+ seq_printf(m, " CMP");
+ if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T0))
+ seq_printf(m, " OVF_PMI(_T0)");
+ if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T1))
+ seq_printf(m, " OVF_PMI_T1");
+ }
+ seq_printf(m, "\n");
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ * Handle writes *
+ *****************************************************************************/
+
+static int set_msr_cpu_mask;
+static unsigned long set_msr_addr;
+static unsigned long set_msr_lo;
+static unsigned long set_msr_hi;
+
+static void perfcntr_write_for(void *unused)
+{
+#ifdef NOHT
+ if (((1 << smp_processor_id()) & set_msr_cpu_mask)) {
+#endif
+ //printk("perfcntr: wrmsr(%08lx, %08lx, %08lx)\n",
+ // set_msr_addr, set_msr_lo, set_msr_hi);
+ wrmsr(set_msr_addr, set_msr_lo, set_msr_hi);
+#ifdef NOHT
+ }
+#endif
+}
+
+ssize_t perfcntr_write(struct file *f,
+ const char *data,
+ size_t size,
+ loff_t *pos)
+{
+ char *endp;
+ ssize_t ret = 0;
+ //unsigned long l, h, msr;
+ unsigned long long v;
+
+ set_msr_cpu_mask = (int)simple_strtoul(data, &endp, 16);
+ endp++; // skip past space
+ if ((endp - data) >= size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_msr_addr = simple_strtoul(endp, &endp, 16);
+ endp++; // skip past space
+ if ((endp - data) >= size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ v = simple_strtoul(endp, &endp, 16);
+ set_msr_lo = (unsigned long)(v & 0xffffffffULL);
+ set_msr_hi = (unsigned long)(v >> 32);
+
+ smp_call_function(perfcntr_write_for, NULL, 1, 1);
+ perfcntr_write_for(NULL);
+
+#if 0
+ wrmsr(msr, l, h);
+ last_l = l;
+ last_h = h;
+ last_msr = msr;
+ last_cpu = smp_processor_id();
+#endif
+ ret = size;
+
+ out:
+ return ret;
+}
+
+/*****************************************************************************
+ * /proc stuff *
+ *****************************************************************************/
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ //return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+ return *pos == 0 ? foobar : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations perfcntr_op = {
+ start: c_start,
+ next: c_next,
+ stop: c_stop,
+ show: show_perfcntr,
+};
+
+struct seq_operations perfcfg_op = {
+ start: c_start,
+ next: c_next,
+ stop: c_stop,
+ show: show_perfcfg,
+};
+
+static int perfcntr_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &perfcntr_op);
+}
+
+static int perfcfg_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &perfcfg_op);
+}
+
+static struct file_operations proc_perfcntr_operations = {
+ open: perfcntr_open,
+ read: seq_read,
+ write: perfcntr_write,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
+static struct file_operations proc_perfcfg_operations = {
+ open: perfcfg_open,
+ read: seq_read,
+ write: perfcntr_write,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
+static void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+{
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry(name, mode, NULL);
+ if (entry)
+ entry->proc_fops = f;
+}
+
+/*****************************************************************************
+ * Module init and cleanup *
+ *****************************************************************************/
+
+static int __init perfcntr_init(void)
+{
+ printk(version);
+
+ create_seq_entry("perfcntr", 0777, &proc_perfcntr_operations);
+ create_seq_entry("perfcntr_config", 0777, &proc_perfcfg_operations);
+
+ return 0;
+}
+
+static void __exit perfcntr_exit(void)
+{
+ remove_proc_entry("perfcntr", NULL);
+ remove_proc_entry("perfcntr_config", NULL);
+}
+
+module_init(perfcntr_init);
+module_exit(perfcntr_exit);
+
+/* End of $RCSfile$ */
--- /dev/null
+/*
+ * P4 Performance counter stuff.
+ *
+ * P4 Xeon with Hyperthreading has counters per physical package which can
+ * count events from either logical CPU. However, in many cases more than
+ * ECSR and CCCR/counter can be used to count the same event. For instr or
+ * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
+ *
+ * $Id: p4perf.h,v 1.2 2003/10/13 16:51:41 jrb44 Exp $
+ *
+ * $Log: p4perf.h,v $
+ * Revision 1.2 2003/10/13 16:51:41 jrb44
+ * *** empty log message ***
+ *
+ */
+
+#ifndef P4PERF_H
+#define P4PERF_H
+
+#ifdef __KERNEL__
+#include <asm/msr.h>
+#endif
+
+/*****************************************************************************
+ * Performance counter configuration. *
+ *****************************************************************************/
+
+#ifndef P6_EVNTSEL_OS
+# define P6_EVNTSEL_OS (1 << 17)
+# define P6_EVNTSEL_USR (1 << 16)
+# define P6_EVNTSEL_E (1 << 18)
+# define P6_EVNTSEL_EN (1 << 22)
+#endif
+#define P6_PERF_INST_RETIRED 0xc0
+#define P6_PERF_UOPS_RETIRED 0xc2
+
+#define P4_ESCR_USR (1 << 2)
+#define P4_ESCR_OS (1 << 3)
+#define P4_ESCR_T0_USR (1 << 2) /* First logical CPU */
+#define P4_ESCR_T0_OS (1 << 3)
+#define P4_ESCR_T1_USR (1 << 0) /* Second logical CPU */
+#define P4_ESCR_T1_OS (1 << 1)
+#define P4_ESCR_TE (1 << 4)
+#define P4_ESCR_THREADS(t) (t)
+#define P4_ESCR_TV(tag) (tag << 5)
+#define P4_ESCR_EVNTSEL(e) (e << 25)
+#define P4_ESCR_EVNTMASK(e) (e << 9)
+
+#define P4_ESCR_EVNTSEL_FRONT_END 0x08
+#define P4_ESCR_EVNTSEL_EXECUTION 0x0c
+#define P4_ESCR_EVNTSEL_REPLAY 0x09
+#define P4_ESCR_EVNTSEL_INSTR_RETIRED 0x02
+#define P4_ESCR_EVNTSEL_UOPS_RETIRED 0x01
+#define P4_ESCR_EVNTSEL_UOP_TYPE 0x02
+#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x05
+//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x04
+
+#define P4_ESCR_EVNTMASK_FE_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_FE_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0 0x01
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1 0x02
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2 0x04
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3 0x08
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS0 0x10
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS1 0x20
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS2 0x40
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS3 0x80
+
+#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_REPLAY_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_IRET_NB_NTAG 0x01
+#define P4_ESCR_EVNTMASK_IRET_NB_TAG 0x02
+#define P4_ESCR_EVNTMASK_IRET_B_NTAG 0x04
+#define P4_ESCR_EVNTMASK_IRET_B_TAG 0x08
+
+#define P4_ESCR_EVNTMASK_URET_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_URET_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_UOP_LOADS 0x02
+#define P4_ESCR_EVNTMASK_UOP_STORES 0x04
+
+#define P4_ESCR_EVNTMASK_RMBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RMBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RMBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RMBRT_INDIR 0x10
+
+#define P4_ESCR_EVNTMASK_RBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RBRT_INDIR 0x10
+
+//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01 /* Non bogus, not tagged */
+//#define P4_ESCR_EVNTMASK_UOPS_RETIRED 0x01 /* Non bogus */
+
+#define P4_CCCR_OVF (1 << 31)
+#define P4_CCCR_CASCADE (1 << 30)
+#define P4_CCCR_FORCE_OVF (1 << 25)
+#define P4_CCCR_EDGE (1 << 24)
+#define P4_CCCR_COMPLEMENT (1 << 19)
+#define P4_CCCR_COMPARE (1 << 18)
+#define P4_CCCR_THRESHOLD(t) (t << 20)
+#define P4_CCCR_ENABLE (1 << 12)
+#define P4_CCCR_ESCR(escr) (escr << 13)
+#define P4_CCCR_ACTIVE_THREAD(t) (t << 16) /* Set to 11 */
+#define P4_CCCR_OVF_PMI_T0 (1 << 26)
+#define P4_CCCR_OVF_PMI_T1 (1 << 27)
+#define P4_CCCR_RESERVED (3 << 16)
+#define P4_CCCR_OVF_PMI (1 << 26)
+
+// BPU
+#define MSR_P4_BPU_COUNTER0 0x300
+#define MSR_P4_BPU_COUNTER1 0x301
+#define MSR_P4_BPU_CCCR0 0x360
+#define MSR_P4_BPU_CCCR1 0x361
+
+#define MSR_P4_BPU_COUNTER2 0x302
+#define MSR_P4_BPU_COUNTER3 0x303
+#define MSR_P4_BPU_CCCR2 0x362
+#define MSR_P4_BPU_CCCR3 0x363
+
+#define MSR_P4_BSU_ESCR0 0x3a0
+#define MSR_P4_FSB_ESCR0 0x3a2
+#define MSR_P4_MOB_ESCR0 0x3aa
+#define MSR_P4_PMH_ESCR0 0x3ac
+#define MSR_P4_BPU_ESCR0 0x3b2
+#define MSR_P4_IS_ESCR0 0x3b4
+#define MSR_P4_ITLB_ESCR0 0x3b6
+#define MSR_P4_IX_ESCR0 0x3c8
+
+#define P4_BSU_ESCR0_NUMBER 7
+#define P4_FSB_ESCR0_NUMBER 6
+#define P4_MOB_ESCR0_NUMBER 2
+#define P4_PMH_ESCR0_NUMBER 4
+#define P4_BPU_ESCR0_NUMBER 0
+#define P4_IS_ESCR0_NUMBER 1
+#define P4_ITLB_ESCR0_NUMBER 3
+#define P4_IX_ESCR0_NUMBER 5
+
+#define MSR_P4_BSU_ESCR1 0x3a1
+#define MSR_P4_FSB_ESCR1 0x3a3
+#define MSR_P4_MOB_ESCR1 0x3ab
+#define MSR_P4_PMH_ESCR1 0x3ad
+#define MSR_P4_BPU_ESCR1 0x3b3
+#define MSR_P4_IS_ESCR1 0x3b5
+#define MSR_P4_ITLB_ESCR1 0x3b7
+#define MSR_P4_IX_ESCR1 0x3c9
+
+#define P4_BSU_ESCR1_NUMBER 7
+#define P4_FSB_ESCR1_NUMBER 6
+#define P4_MOB_ESCR1_NUMBER 2
+#define P4_PMH_ESCR1_NUMBER 4
+#define P4_BPU_ESCR1_NUMBER 0
+#define P4_IS_ESCR1_NUMBER 1
+#define P4_ITLB_ESCR1_NUMBER 3
+#define P4_IX_ESCR1_NUMBER 5
+
+// MS
+#define MSR_P4_MS_COUNTER0 0x304
+#define MSR_P4_MS_COUNTER1 0x305
+#define MSR_P4_MS_CCCR0 0x364
+#define MSR_P4_MS_CCCR1 0x365
+
+#define MSR_P4_MS_COUNTER2 0x306
+#define MSR_P4_MS_COUNTER3 0x307
+#define MSR_P4_MS_CCCR2 0x366
+#define MSR_P4_MS_CCCR3 0x367
+
+#define MSR_P4_MS_ESCR0 0x3c0
+#define MSR_P4_TBPU_ESCR0 0x3c2
+#define MSR_P4_TC_ESCR0 0x3c4
+
+#define P4_MS_ESCR0_NUMBER 0
+#define P4_TBPU_ESCR0_NUMBER 2
+#define P4_TC_ESCR0_NUMBER 1
+
+#define MSR_P4_MS_ESCR1 0x3c1
+#define MSR_P4_TBPU_ESCR1 0x3c3
+#define MSR_P4_TC_ESCR1 0x3c5
+
+#define P4_MS_ESCR1_NUMBER 0
+#define P4_TBPU_ESCR1_NUMBER 2
+#define P4_TC_ESCR1_NUMBER 1
+
+// FLAME
+#define MSR_P4_FLAME_COUNTER0 0x308
+#define MSR_P4_FLAME_COUNTER1 0x309
+#define MSR_P4_FLAME_CCCR0 0x368
+#define MSR_P4_FLAME_CCCR1 0x369
+
+#define MSR_P4_FLAME_COUNTER2 0x30a
+#define MSR_P4_FLAME_COUNTER3 0x30b
+#define MSR_P4_FLAME_CCCR2 0x36a
+#define MSR_P4_FLAME_CCCR3 0x36b
+
+#define MSR_P4_FIRM_ESCR0 0x3a4
+#define MSR_P4_FLAME_ESCR0 0x3a6
+#define MSR_P4_DAC_ESCR0 0x3a8
+#define MSR_P4_SAAT_ESCR0 0x3ae
+#define MSR_P4_U2L_ESCR0 0x3b0
+
+#define P4_FIRM_ESCR0_NUMBER 1
+#define P4_FLAME_ESCR0_NUMBER 0
+#define P4_DAC_ESCR0_NUMBER 5
+#define P4_SAAT_ESCR0_NUMBER 2
+#define P4_U2L_ESCR0_NUMBER 3
+
+#define MSR_P4_FIRM_ESCR1 0x3a5
+#define MSR_P4_FLAME_ESCR1 0x3a7
+#define MSR_P4_DAC_ESCR1 0x3a9
+#define MSR_P4_SAAT_ESCR1 0x3af
+#define MSR_P4_U2L_ESCR1 0x3b1
+
+#define P4_FIRM_ESCR1_NUMBER 1
+#define P4_FLAME_ESCR1_NUMBER 0
+#define P4_DAC_ESCR1_NUMBER 5
+#define P4_SAAT_ESCR1_NUMBER 2
+#define P4_U2L_ESCR1_NUMBER 3
+
+// IQ
+#define MSR_P4_IQ_COUNTER0 0x30c
+#define MSR_P4_IQ_COUNTER1 0x30d
+#define MSR_P4_IQ_CCCR0 0x36c
+#define MSR_P4_IQ_CCCR1 0x36d
+
+#define MSR_P4_IQ_COUNTER2 0x30e
+#define MSR_P4_IQ_COUNTER3 0x30f
+#define MSR_P4_IQ_CCCR2 0x36e
+#define MSR_P4_IQ_CCCR3 0x36f
+
+#define MSR_P4_IQ_COUNTER4 0x310
+#define MSR_P4_IQ_COUNTER5 0x311
+#define MSR_P4_IQ_CCCR4 0x370
+#define MSR_P4_IQ_CCCR5 0x371
+
+#define MSR_P4_CRU_ESCR0 0x3b8
+#define MSR_P4_CRU_ESCR2 0x3cc
+#define MSR_P4_CRU_ESCR4 0x3e0
+#define MSR_P4_IQ_ESCR0 0x3ba
+#define MSR_P4_RAT_ESCR0 0x3bc
+#define MSR_P4_SSU_ESCR0 0x3be
+#define MSR_P4_ALF_ESCR0 0x3ca
+
+#define P4_CRU_ESCR0_NUMBER 4
+#define P4_CRU_ESCR2_NUMBER 5
+#define P4_CRU_ESCR4_NUMBER 6
+#define P4_IQ_ESCR0_NUMBER 0
+#define P4_RAT_ESCR0_NUMBER 2
+#define P4_SSU_ESCR0_NUMBER 3
+#define P4_ALF_ESCR0_NUMBER 1
+
+#define MSR_P4_CRU_ESCR1 0x3b9
+#define MSR_P4_CRU_ESCR3 0x3cd
+#define MSR_P4_CRU_ESCR5 0x3e1
+#define MSR_P4_IQ_ESCR1 0x3bb
+#define MSR_P4_RAT_ESCR1 0x3bd
+#define MSR_P4_ALF_ESCR1 0x3cb
+
+#define P4_CRU_ESCR1_NUMBER 4
+#define P4_CRU_ESCR3_NUMBER 5
+#define P4_CRU_ESCR5_NUMBER 6
+#define P4_IQ_ESCR1_NUMBER 0
+#define P4_RAT_ESCR1_NUMBER 2
+#define P4_ALF_ESCR1_NUMBER 1
+
+#define P4_BPU_COUNTER0_NUMBER 0
+#define P4_BPU_COUNTER1_NUMBER 1
+#define P4_BPU_COUNTER2_NUMBER 2
+#define P4_BPU_COUNTER3_NUMBER 3
+
+#define P4_MS_COUNTER0_NUMBER 4
+#define P4_MS_COUNTER1_NUMBER 5
+#define P4_MS_COUNTER2_NUMBER 6
+#define P4_MS_COUNTER3_NUMBER 7
+
+#define P4_FLAME_COUNTER0_NUMBER 8
+#define P4_FLAME_COUNTER1_NUMBER 9
+#define P4_FLAME_COUNTER2_NUMBER 10
+#define P4_FLAME_COUNTER3_NUMBER 11
+
+#define P4_IQ_COUNTER0_NUMBER 12
+#define P4_IQ_COUNTER1_NUMBER 13
+#define P4_IQ_COUNTER2_NUMBER 14
+#define P4_IQ_COUNTER3_NUMBER 15
+#define P4_IQ_COUNTER4_NUMBER 16
+#define P4_IQ_COUNTER5_NUMBER 17
+
+/* PEBS
+ */
+#define MSR_P4_PEBS_ENABLE 0x3F1
+#define MSR_P4_PEBS_MATRIX_VERT 0x3F2
+
+#define P4_PEBS_ENABLE_MY_THR (1 << 25)
+#define P4_PEBS_ENABLE_OTH_THR (1 << 26)
+#define P4_PEBS_ENABLE (1 << 24)
+#define P4_PEBS_BIT0 (1 << 0)
+#define P4_PEBS_BIT1 (1 << 1)
+#define P4_PEBS_BIT2 (1 << 2)
+
+#define P4_PEBS_MATRIX_VERT_BIT0 (1 << 0)
+#define P4_PEBS_MATRIX_VERT_BIT1 (1 << 1)
+#define P4_PEBS_MATRIX_VERT_BIT2 (1 << 2)
+
+/* Replay tagging.
+ */
+#define P4_REPLAY_TAGGING_PEBS_L1LMR P4_PEBS_BIT0
+#define P4_REPLAY_TAGGING_PEBS_L2LMR P4_PEBS_BIT1
+#define P4_REPLAY_TAGGING_PEBS_DTLMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTSMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTAMR P4_PEBS_BIT2
+
+#define P4_REPLAY_TAGGING_VERT_L1LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_L2LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTLMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTSMR P4_PEBS_MATRIX_VERT_BIT1
+#define P4_REPLAY_TAGGING_VERT_DTAMR P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
+
+
+
+
+/*****************************************************************************
+ * *
+ *****************************************************************************/
+
+// x87_FP_uop
+#define EVENT_SEL_x87_FP_uop 0x04
+#define EVENT_MASK_x87_FP_uop_ALL (1 << 15)
+
+// execution event (at retirement)
+#define EVENT_SEL_execution_event 0x0C
+
+// scalar_SP_uop
+#define EVENT_SEL_scalar_SP_uop 0x0a
+#define EVENT_MASK_scalar_SP_uop_ALL (1 << 15)
+
+// scalar_DP_uop
+#define EVENT_SEL_scalar_DP_uop 0x0e
+#define EVENT_MASK_scalar_DP_uop_ALL (1 << 15)
+
+// Instruction retired
+#define EVENT_SEL_instr_retired 0x02
+#define EVENT_MASK_instr_retired_ALL 0x0f
+
+// uOps retired
+#define EVENT_SEL_uops_retired 0x01
+#define EVENT_MASK_uops_retired_ALL 0x03
+
+// L1 misses retired
+#define EVENT_SEL_replay_event 0x09
+#define EVENT_MASK_replay_event_ALL 0x03
+
+// Trace cache
+#define EVENT_SEL_BPU_fetch_request 0x03
+#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
+
+// Bus activity
+#define EVENT_SEL_FSB_data_activity 0x17
+#define EVENT_MASK_FSB_data_activity_DRDY_DRV 0x01
+#define EVENT_MASK_FSB_data_activity_DRDY_OWN 0x02
+#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER 0x04
+#define EVENT_MASK_FSB_data_activity_DBSY_DRV 0x08
+#define EVENT_MASK_FSB_data_activity_DBSY_OWN 0x10
+#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER 0x20
+
+// Cache L2
+#define EVENT_SEL_BSQ_cache_reference 0x0c
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
+#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
+
+#endif
+
+/* End of $RCSfile: p4perf.h,v $ */